1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package net.sf.jmimemagic;
24
25 import org.apache.commons.logging.Log;
26 import org.apache.commons.logging.LogFactory;
27 import org.apache.oro.text.perl.Perl5Util;
28
29 import java.io.File;
30 import java.io.IOException;
31 import java.io.RandomAccessFile;
32
33 import java.nio.ByteBuffer;
34 import java.nio.ByteOrder;
35
36 import java.util.ArrayList;
37 import java.util.Collection;
38 import java.util.Iterator;
39
40
41 /***
42 * This class represents a single match test
43 *
44 * @author $Author: arimus $
45 * @version $Revision: 1.1 $
46 */
47 public class MagicMatcher implements Cloneable
48 {
49 private static Log log = LogFactory.getLog(MagicMatcher.class);
50 private ArrayList subMatchers = new ArrayList(0);
51 private MagicMatch match = null;
52
53 /***
54 * constructor
55 */
56 public MagicMatcher()
57 {
58 log.debug("instantiated");
59 }
60
61 /***
62 * DOCUMENT ME!
63 *
64 * @param match DOCUMENT ME!
65 */
66 public void setMatch(MagicMatch match)
67 {
68 log.debug("setMatch()");
69 this.match = match;
70 }
71
72 /***
73 * DOCUMENT ME!
74 *
75 * @return DOCUMENT ME!
76 */
77 public MagicMatch getMatch()
78 {
79 log.debug("getMatch()");
80
81 return this.match;
82 }
83
84 /***
85 * test to see if everything is in order for this match
86 *
87 * @return whether or not this match has enough data to be valid
88 */
89 public boolean isValid()
90 {
91 log.debug("isValid()");
92
93 if ((match == null) || (match.getTest() == null)) {
94 return false;
95 }
96
97 String type = new String(match.getTest().array());
98 char comparator = match.getComparator();
99 String description = match.getDescription();
100 String test = new String(match.getTest().array());
101
102 if ((type != null) && !type.equals("") && (comparator != '\0') &&
103 ((comparator == '=') || (comparator == '!') || (comparator == '>') ||
104 (comparator == '<')) && (description != null) && !description.equals("") &&
105 (test != null) && !test.equals("")) {
106 return true;
107 }
108
109 return false;
110 }
111
112 /***
113 * add a submatch to this magic match
114 *
115 * @param m a magic match
116 */
117 public void addSubMatcher(MagicMatcher m)
118 {
119 log.debug("addSubMatcher()");
120 subMatchers.add(m);
121 }
122
123 /***
124 * set all submatches
125 *
126 * @param a a collection of submatches
127 */
128 public void setSubMatchers(Collection a)
129 {
130 log.debug("setSubMatchers(): for match '" + match.getDescription() + "'");
131 subMatchers.clear();
132 subMatchers.addAll(a);
133 }
134
135 /***
136 * get all submatches for this magic match
137 *
138 * @return a collection of submatches
139 */
140 public Collection getSubMatchers()
141 {
142 log.debug("getSubMatchers()");
143
144 return subMatchers;
145 }
146
147 /***
148 * test to see if this match or any submatches match
149 *
150 * @param f the file that should be used to test the match
151 * @param onlyMimeMatch DOCUMENT ME!
152 *
153 * @return the deepest magic match object that matched
154 *
155 * @throws IOException DOCUMENT ME!
156 * @throws UnsupportedTypeException DOCUMENT ME!
157 */
158 public MagicMatch test(File f, boolean onlyMimeMatch)
159 throws IOException, UnsupportedTypeException
160 {
161 log.debug("test(File)");
162
163 int offset = match.getOffset();
164 String description = match.getDescription();
165 String type = match.getType();
166 String mimeType = match.getMimeType();
167
168 log.debug("test(File): testing '" + f.getName() + "' for '" + description + "'");
169
170 log.debug("test(File): \n=== BEGIN MATCH INFO ==");
171 log.debug(match.print());
172 log.debug("test(File): \n=== END MATCH INFO ====\n");
173
174 RandomAccessFile file = null;
175 file = new RandomAccessFile(f, "r");
176
177 try {
178 int length = 0;
179
180 if (type.equals("byte")) {
181 length = 1;
182 } else if (type.equals("short") || type.equals("leshort") || type.equals("beshort")) {
183 length = 4;
184 } else if (type.equals("long") || type.equals("lelong") || type.equals("belong")) {
185 length = 8;
186 } else if (type.equals("string")) {
187 length = match.getTest().capacity();
188 } else if (type.equals("regex")) {
189 length = (int) file.length() - offset;
190
191 if (length < 0) {
192 length = 0;
193 }
194 } else if (type.equals("detector")) {
195 length = (int) file.length() - offset;
196
197 if (length < 0) {
198 length = 0;
199 }
200 } else {
201 throw new UnsupportedTypeException("unsupported test type '" + type + "'");
202 }
203
204
205 if (length > (file.length() - offset)) {
206 return null;
207 }
208
209 byte[] buf = new byte[length];
210 file.seek(offset);
211
212 int bytesRead = 0;
213 int size = 0;
214 boolean gotAllBytes = false;
215 boolean done = false;
216
217 while (!done) {
218 size = file.read(buf, 0, length - bytesRead);
219
220 if (size == -1) {
221 throw new IOException("reached end of file before all bytes were read");
222 }
223
224 bytesRead += size;
225
226 if (bytesRead == length) {
227 gotAllBytes = true;
228 done = true;
229 }
230 }
231
232 log.debug("test(File): stream size is '" + buf.length + "'");
233
234 MagicMatch match = null;
235 MagicMatch submatch = null;
236
237 if (testInternal(buf)) {
238
239 match = getMatch();
240
241 log.debug("test(File): testing matched '" + description + "'");
242
243
244 if ((onlyMimeMatch == false) && (subMatchers != null) && (subMatchers.size() > 0)) {
245 log.debug("test(File): testing " + subMatchers.size() + " submatches for '" +
246 description + "'");
247
248 for (int i = 0; i < subMatchers.size(); i++) {
249 log.debug("test(File): testing submatch " + i);
250
251 MagicMatcher m = (MagicMatcher) subMatchers.get(i);
252
253 if ((submatch = m.test(f, false)) != null) {
254 log.debug("test(File): submatch " + i + " matched with '" +
255 submatch.getDescription() + "'");
256 match.addSubMatch(submatch);
257 } else {
258 log.debug("test(File): submatch " + i + " doesn't match");
259 }
260 }
261 }
262 }
263
264 return match;
265 } finally {
266 try {
267 file.close();
268 } catch (Exception fce) {
269 }
270 }
271 }
272
273 /***
274 * test to see if this match or any submatches match
275 *
276 * @param data the data that should be used to test the match
277 * @param onlyMimeMatch DOCUMENT ME!
278 *
279 * @return the deepest magic match object that matched
280 *
281 * @throws IOException DOCUMENT ME!
282 * @throws UnsupportedTypeException DOCUMENT ME!
283 */
284 public MagicMatch test(byte[] data, boolean onlyMimeMatch)
285 throws IOException, UnsupportedTypeException
286 {
287 log.debug("test(byte[])");
288
289 int offset = match.getOffset();
290 String description = match.getDescription();
291 String type = match.getType();
292 String test = new String(match.getTest().array());
293 String mimeType = match.getMimeType();
294
295 log.debug("test(byte[]): testing byte[] data for '" + description + "'");
296
297 log.debug("test(byte[]): \n=== BEGIN MATCH INFO ==");
298 log.debug(match.print());
299 log.debug("test(byte[]): \n=== END MATCH INFO ====\n");
300
301 int length = 0;
302
303 if (type.equals("byte")) {
304 length = 1;
305 } else if (type.equals("short") || type.equals("leshort") || type.equals("beshort")) {
306 length = 4;
307 } else if (type.equals("long") || type.equals("lelong") || type.equals("belong")) {
308 length = 8;
309 } else if (type.equals("string")) {
310 length = match.getTest().capacity();
311 } else if (type.equals("regex")) {
312
313 length = data.length - offset - 1;
314
315 if (length < 0) {
316 length = 0;
317 }
318 } else if (type.equals("detector")) {
319
320 length = data.length - offset - 1;
321
322 if (length < 0) {
323 length = 0;
324 }
325 } else {
326 throw new UnsupportedTypeException("unsupported test type " + type);
327 }
328
329 byte[] buf = new byte[length];
330 log.debug("test(byte[]): offset=" + offset + ",length=" + length + ",data length=" +
331 data.length);
332
333 if ((offset + length) < data.length) {
334 System.arraycopy(data, offset, buf, 0, length);
335
336 log.debug("test(byte[]): stream size is '" + buf.length + "'");
337
338 MagicMatch match = null;
339 MagicMatch submatch = null;
340
341 if (testInternal(buf)) {
342
343 match = getMatch();
344
345 log.debug("test(byte[]): testing matched '" + description + "'");
346
347
348 if ((onlyMimeMatch == false) && (subMatchers != null) && (subMatchers.size() > 0)) {
349 log.debug("test(byte[]): testing " + subMatchers.size() + " submatches for '" +
350 description + "'");
351
352 for (int i = 0; i < subMatchers.size(); i++) {
353 log.debug("test(byte[]): testing submatch " + i);
354
355 MagicMatcher m = (MagicMatcher) subMatchers.get(i);
356
357 if ((submatch = m.test(data, false)) != null) {
358 log.debug("test(byte[]): submatch " + i + " matched with '" +
359 submatch.getDescription() + "'");
360 match.addSubMatch(submatch);
361 } else {
362 log.debug("test(byte[]): submatch " + i + " doesn't match");
363 }
364 }
365 }
366 }
367
368 return match;
369 } else {
370 return null;
371 }
372 }
373
374 /***
375 * internal test switch
376 *
377 * @param data DOCUMENT ME!
378 * @return DOCUMENT ME!
379 */
380 private boolean testInternal(byte[] data)
381 {
382 log.debug("testInternal(byte[])");
383
384 if (data.length == 0) {
385 return false;
386 }
387
388 String type = match.getType();
389 String test = new String(match.getTest().array());
390 String mimeType = match.getMimeType();
391 String description = match.getDescription();
392
393 ByteBuffer buffer = ByteBuffer.allocate(data.length);
394
395 if ((type != null) && (test != null) && (test.length() > 0)) {
396 if (type.equals("string")) {
397 buffer = buffer.put(data);
398
399 return testString(buffer);
400 } else if (type.equals("byte")) {
401 buffer = buffer.put(data);
402
403 return testByte(buffer);
404 } else if (type.equals("short")) {
405 buffer = buffer.put(data);
406
407 return testShort(buffer);
408 } else if (type.equals("leshort")) {
409 buffer = buffer.put(data);
410 buffer.order(ByteOrder.LITTLE_ENDIAN);
411
412 return testShort(buffer);
413 } else if (type.equals("beshort")) {
414 buffer = buffer.put(data);
415 buffer.order(ByteOrder.BIG_ENDIAN);
416
417 return testShort(buffer);
418 } else if (type.equals("long")) {
419 buffer = buffer.put(data);
420
421 return testLong(buffer);
422 } else if (type.equals("lelong")) {
423 buffer = buffer.put(data);
424 buffer.order(ByteOrder.LITTLE_ENDIAN);
425
426 return testLong(buffer);
427 } else if (type.equals("belong")) {
428 buffer = buffer.put(data);
429 buffer.order(ByteOrder.BIG_ENDIAN);
430
431 return testLong(buffer);
432 } else if (type.equals("regex")) {
433 return testRegex(new String(data));
434 } else if (type.equals("detector")) {
435 buffer = buffer.put(data);
436
437 return testDetector(buffer);
438
439
440
441
442
443
444
445 } else {
446 log.error("testInternal(byte[]): invalid test type '" + type + "'");
447 }
448 } else {
449 log.error("testInternal(byte[]): type or test is empty for '" + mimeType + " - " +
450 description + "'");
451 }
452
453 return false;
454 }
455
456 /***
457 * test the data against the test byte
458 *
459 * @param data the data we are testing
460 *
461 * @return if we have a match
462 */
463 private boolean testByte(ByteBuffer data)
464 {
465 log.debug("testByte()");
466
467 String test = new String(match.getTest().array());
468 char comparator = match.getComparator();
469 long bitmask = match.getBitmask();
470
471 String s = test;
472 byte b = data.get(0);
473 b = (byte) (b & bitmask);
474 log.debug("testByte(): decoding '" + test + "' to byte");
475
476 int tst = Integer.decode(test).byteValue();
477 byte t = (byte) (tst & 0xff);
478 log.debug("testByte(): applying bitmask '" + bitmask + "' to '" + tst + "', result is '" +
479 t + "'");
480 log.debug("testByte(): comparing byte '" + b + "' to '" + t + "'");
481
482 switch (comparator) {
483 case '=':
484 return t == b;
485
486 case '!':
487 return t != b;
488
489 case '>':
490 return t > b;
491
492 case '<':
493 return t < b;
494 }
495
496 return false;
497 }
498
499 /***
500 * test the data against the byte array
501 *
502 * @param data the data we are testing
503 *
504 * @return if we have a match
505 */
506 private boolean testString(ByteBuffer data)
507 {
508 log.debug("testString()");
509
510 ByteBuffer test = match.getTest();
511 char comparator = match.getComparator();
512
513 byte[] b = data.array();
514 byte[] t = test.array();
515
516 boolean diff = false;
517 int i = 0;
518
519 for (i = 0; i < t.length; i++) {
520 log.debug("testing byte '" + b[i] + "' from '" + new String(data.array()) +
521 "' against byte '" + t[i] + "' from '" + new String(test.array()) + "'");
522
523 if (t[i] != b[i]) {
524 diff = true;
525
526 break;
527 }
528 }
529
530 switch (comparator) {
531 case '=':
532 return !diff;
533
534 case '!':
535 return diff;
536
537 case '>':
538 return t[i] > b[i];
539
540 case '<':
541 return t[i] < b[i];
542 }
543
544 return false;
545 }
546
547 /***
548 * test the data against a short
549 *
550 * @param data the data we are testing
551 *
552 * @return if we have a match
553 */
554 private boolean testShort(ByteBuffer data)
555 {
556 log.debug("testShort()");
557
558 short val = 0;
559 String test = new String(match.getTest().array());
560 char comparator = match.getComparator();
561 long bitmask = match.getBitmask();
562
563 val = byteArrayToShort(data);
564
565
566 val = (short) (val & (short) bitmask);
567
568 short tst = 0;
569
570 try {
571 tst = Integer.decode(test).shortValue();
572 } catch (NumberFormatException e) {
573 log.error("testShort(): " + e);
574
575 return false;
576
577
578
579
580 }
581
582 log.debug("testShort(): testing '" + Long.toHexString(val) + "' against '" +
583 Long.toHexString(tst) + "'");
584
585 switch (comparator) {
586 case '=':
587 return val == tst;
588
589 case '!':
590 return val != tst;
591
592 case '>':
593 return val > tst;
594
595 case '<':
596 return val < tst;
597 }
598
599 return false;
600 }
601
602 /***
603 * test the data against a long
604 *
605 * @param data the data we are testing
606 *
607 * @return if we have a match
608 */
609 private boolean testLong(ByteBuffer data)
610 {
611 log.debug("testLong()");
612
613 long val = 0;
614 String test = new String(match.getTest().array());
615 char comparator = match.getComparator();
616 long bitmask = match.getBitmask();
617
618 val = byteArrayToLong(data);
619
620
621 val = val & bitmask;
622
623 long tst = Long.decode(test).longValue();
624
625 log.debug("testLong(): testing '" + Long.toHexString(val) + "' against '" + test +
626 "' => '" + Long.toHexString(tst) + "'");
627
628 switch (comparator) {
629 case '=':
630 return val == tst;
631
632 case '!':
633 return val != tst;
634
635 case '>':
636 return val > tst;
637
638 case '<':
639 return val < tst;
640 }
641
642 return false;
643 }
644
645 /***
646 * test the data against a regex
647 *
648 * @param text the data we are testing
649 *
650 * @return if we have a match
651 */
652 private boolean testRegex(String text)
653 {
654 log.debug("testRegex()");
655
656 String test = new String(match.getTest().array());
657 char comparator = match.getComparator();
658
659 Perl5Util utility = new Perl5Util();
660 log.debug("testRegex(): searching for '" + test + "'");
661
662 if (comparator == '=') {
663 if (utility.match(test, text)) {
664 return true;
665 } else {
666 return false;
667 }
668 } else if (comparator == '!') {
669 if (utility.match(test, text)) {
670 return false;
671 } else {
672 return true;
673 }
674 }
675
676 return false;
677 }
678
679 /***
680 * test the data using a detector
681 *
682 * @param data the data we are testing
683 *
684 * @return if we have a match
685 */
686 private boolean testDetector(ByteBuffer data)
687 {
688 log.debug("testDetector()");
689
690 String detectorClass = new String(match.getTest().array());
691
692 try {
693 log.debug("loading class: " + detectorClass);
694
695 Class c = Class.forName(detectorClass);
696 MagicDetector detector = (MagicDetector) c.newInstance();
697 String[] types = detector.process(data.array(), match.getOffset(), match.getLength(),
698 match.getBitmask(), match.getComparator(), match.getMimeType(),
699 match.getProperties());
700
701 if ((types != null) && (types.length > 0)) {
702
703 match.setMimeType(types[0]);
704
705 return true;
706 }
707 } catch (ClassNotFoundException e) {
708 log.error("failed to load detector: " + detectorClass, e);
709 } catch (InstantiationException e) {
710 log.error("specified class is not a valid detector class: " + detectorClass, e);
711 } catch (IllegalAccessException e) {
712 log.error("specified class cannot be accessed: " + detectorClass, e);
713 }
714
715 return false;
716 }
717
718 /***
719 * Get the extensions for the underlying detectory
720 *
721 * @return DOCUMENT ME!
722 */
723 public String[] getDetectorExtensions()
724 {
725 log.debug("testDetector()");
726
727 String detectorClass = new String(match.getTest().array());
728
729 try {
730 log.debug("loading class: " + detectorClass);
731
732 Class c = Class.forName(detectorClass);
733 MagicDetector detector = (MagicDetector) c.newInstance();
734
735 return detector.getHandledTypes();
736 } catch (ClassNotFoundException e) {
737 log.error("failed to load detector: " + detectorClass, e);
738 } catch (InstantiationException e) {
739 log.error("specified class is not a valid detector class: " + detectorClass, e);
740 } catch (IllegalAccessException e) {
741 log.error("specified class cannot be accessed: " + detectorClass, e);
742 }
743
744 return new String[0];
745 }
746
747 /***
748 * encode a byte as an octal string
749 *
750 * @param b a byte of data
751 *
752 * @return an octal representation of the byte data
753 */
754 private String byteToOctalString(byte b)
755 {
756 int n1;
757 int n2;
758 int n3;
759 n1 = (b / 32) & 7;
760 n2 = (b / 8) & 7;
761 n3 = b & 7;
762
763 return String.valueOf(n1) + String.valueOf(n2) + String.valueOf(n3);
764 }
765
766 /***
767 * convert a byte array to a short
768 *
769 * @param data buffer of byte data
770 *
771 * @return byte array converted to a short
772 */
773 private short byteArrayToShort(ByteBuffer data)
774 {
775 return data.getShort(0);
776 }
777
778 /***
779 * convert a byte array to a long
780 *
781 * @param data buffer of byte data
782 *
783 * @return byte arrays (high and low bytes) converted to a long value
784 */
785 private long byteArrayToLong(ByteBuffer data)
786 {
787 return (long) data.getInt(0);
788 }
789
790 /***
791 * DOCUMENT ME!
792 *
793 * @return DOCUMENT ME!
794 *
795 * @throws CloneNotSupportedException DOCUMENT ME!
796 */
797 protected Object clone()
798 throws CloneNotSupportedException
799 {
800 MagicMatcher clone = new MagicMatcher();
801
802 clone.setMatch((MagicMatch) match.clone());
803
804 Iterator i = subMatchers.iterator();
805 ArrayList sub = new ArrayList();
806
807 while (i.hasNext()) {
808 MagicMatcher m = (MagicMatcher) i.next();
809 sub.add(m.clone());
810 }
811
812 clone.setSubMatchers(sub);
813
814 return clone;
815 }
816 }